# Load necessary libraries
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(raster) #raster()
## Loading required package: sp
## The legacy packages maptools, rgdal, and rgeos, underpinning the sp package,
## which was just loaded, will retire in October 2023.
## Please refer to R-spatial evolution reports for details, especially
## https://r-spatial.org/r/2023/05/15/evolution4.html.
## It may be desirable to make the sf package available;
## package maintainers should consider adding sf to Suggests:.
## The sp package is now running under evolution status 2
## (status 2 uses the sf package in place of rgdal)
##
## Attaching package: 'raster'
##
## The following object is masked from 'package:dplyr':
##
## select
library(sf) #st_read()
## Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1; sf_use_s2() is TRUE
library(ggspatial) #annotation_scale,annotation_north_arrow
library(ggnewscale) #new_scale_color()
library(ggsn) #scalebar()
## Loading required package: grid
## Warning: multiple methods tables found for 'elide'
##
## Attaching package: 'ggsn'
##
## The following object is masked from 'package:raster':
##
## scalebar
library(shiny) #Shiny app
## This version of 'bslib' is designed to work with 'shiny' >= 1.6.0.
## Please upgrade via install.packages('shiny').
library(plotly) #plot_ly()
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:raster':
##
## select
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
library(gridExtra) #grid.arrange()
##
## Attaching package: 'gridExtra'
##
## The following object is masked from 'package:dplyr':
##
## combine
# Set working directory
setwd("~/FinalProject")
# Read data files
Unemployrate <-read_csv("/home/rstudio/FinalProject/unemployment_county.csv")
## Rows: 37674 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): County, State
## dbl (2): Unemployment Rate, Year
## num (3): Labor Force, Employed, Unemployed
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(Unemployrate)
## # A tibble: 6 × 7
## County State `Labor Force` Employed Unemployed `Unemployment Rate` Year
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Autauga Cou… AL 24383 23577 806 3.3 2007
## 2 Baldwin Cou… AL 82659 80099 2560 3.1 2007
## 3 Barbour Cou… AL 10334 9684 650 6.3 2007
## 4 Bibb County AL 8791 8432 359 4.1 2007
## 5 Blount Coun… AL 26629 25780 849 3.2 2007
## 6 Bullock Cou… AL 3653 3308 345 9.4 2007
Crimerate<-read_csv("/home/rstudio/FinalProject/crime_and_incarceration_by_state.csv")
## Rows: 816 Columns: 17
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): jurisdiction
## dbl (13): year, prisoner_count, state_population, violent_crime_total, murde...
## lgl (3): includes_jails, crime_reporting_change, crimes_estimated
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(Crimerate)
## # A tibble: 6 × 17
## jurisdiction includes_jails year prisoner_count crime_reporting_change
## <chr> <lgl> <dbl> <dbl> <lgl>
## 1 FEDERAL FALSE 2001 149852 NA
## 2 ALABAMA FALSE 2001 24741 FALSE
## 3 ALASKA TRUE 2001 4570 FALSE
## 4 ARIZONA FALSE 2001 27710 FALSE
## 5 ARKANSAS FALSE 2001 11489 FALSE
## 6 CALIFORNIA FALSE 2001 157142 FALSE
## # ℹ 12 more variables: crimes_estimated <lgl>, state_population <dbl>,
## # violent_crime_total <dbl>, murder_manslaughter <dbl>, rape_legacy <dbl>,
## # rape_revised <dbl>, robbery <dbl>, agg_assault <dbl>,
## # property_crime_total <dbl>, burglary <dbl>, larceny <dbl>,
## # vehicle_theft <dbl>
States<-st_read("/home/rstudio/FinalProject/tl_2019_us_state.shp")
## Reading layer `tl_2019_us_state' from data source
## `/home/rstudio/FinalProject/tl_2019_us_state.shp' using driver `ESRI Shapefile'
## Simple feature collection with 56 features and 14 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -179.2311 ymin: -14.60181 xmax: 179.8597 ymax: 71.43979
## Geodetic CRS: NAD83
######## Focus on Contiguous USA
Contiguous_state <- States%>% filter(STUSPS!="AK"&
STUSPS!="AS"&
STUSPS!="MP"&
STUSPS!="PR"&
STUSPS!="VI"&
STUSPS!="HI"&
STUSPS!="GU")
###Check the length of states.49 is what we need.
length(unique(Contiguous_state$STUSPS))
## [1] 49
##Check the length of State
unique(Unemployrate $State)
## [1] "AL" "AK" "AZ" "AR" "CA" "CO" "CT" "DE" "FL" "GA" "HI" "ID" "IL" "IN" "IA"
## [16] "KS" "KY" "LA" "ME" "MD" "MA" "MI" "MN" "MS" "MO" "MT" "NE" "NV" "NH" "NJ"
## [31] "NM" "NY" "NC" "ND" "OH" "OK" "OR" "PA" "RI" "SC" "SD" "TN" "TX" "UT" "VT"
## [46] "VA" "WA" "WV" "WI" "WY"
# Check the length of unique states in Unemployment data
length(unique(Unemployrate $State))
## [1] 50
# Clean Unemployment data
## `summarise()` has grouped output by 'State'.
Unemployrate <- Unemployrate %>% filter(State!="AK"& State!="HI") %>%
group_by(State,Year) %>%
summarise(Totalforce=sum(`Labor Force`),
Totalemployed=sum(Employed),
Totalunemployed=sum(Unemployed),
Meanrate=mean(`Unemployment Rate`,rm.na=TRUE)
)
## `summarise()` has grouped output by 'State'. You can override using the
## `.groups` argument.
### Check the length of unique states in Unemployment data again
length(unique(Unemployrate $State))
## [1] 48
head(Unemployrate)
## # A tibble: 6 × 6
## # Groups: State [1]
## State Year Totalforce Totalemployed Totalunemployed Meanrate
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AL 2007 2175614 2089131 86483 4.9
## 2 AL 2008 2176491 2053479 123012 6.98
## 3 AL 2009 2163003 1924749 238254 13.2
## 4 AL 2010 2196053 1964561 231492 12.3
## 5 AL 2011 2202672 1990414 212258 11.2
## 6 AL 2012 2176343 2003292 173051 9.25
# Rename column and filter years
colnames(Unemployrate)
## [1] "State" "Year" "Totalforce" "Totalemployed"
## [5] "Totalunemployed" "Meanrate"
##Change the column name “State” into "STUSPS" by using rename().Use filter() to pick out year data from 2007 to 2014.
Unemployrate <- Unemployrate %>%
filter(Year %in% c(2007:2014))
head(Unemployrate)
## # A tibble: 6 × 6
## # Groups: State [1]
## State Year Totalforce Totalemployed Totalunemployed Meanrate
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AL 2007 2175614 2089131 86483 4.9
## 2 AL 2008 2176491 2053479 123012 6.98
## 3 AL 2009 2163003 1924749 238254 13.2
## 4 AL 2010 2196053 1964561 231492 12.3
## 5 AL 2011 2202672 1990414 212258 11.2
## 6 AL 2012 2176343 2003292 173051 9.25
unique(Crimerate$jurisdiction)
## [1] "FEDERAL" "ALABAMA" "ALASKA" "ARIZONA"
## [5] "ARKANSAS" "CALIFORNIA" "COLORADO" "CONNECTICUT"
## [9] "DELAWARE" "FLORIDA" "GEORGIA" "HAWAII"
## [13] "IDAHO" "ILLINOIS" "INDIANA" "IOWA"
## [17] "KANSAS" "KENTUCKY" "LOUISIANA" "MAINE"
## [21] "MARYLAND" "MASSACHUSETTS" "MICHIGAN" "MINNESOTA"
## [25] "MISSISSIPPI" "MISSOURI" "MONTANA" "NEBRASKA"
## [29] "NEVADA" "NEW HAMPSHIRE" "NEW JERSEY" "NEW MEXICO"
## [33] "NEW YORK" "NORTH CAROLINA" "NORTH DAKOTA" "OHIO"
## [37] "OKLAHOMA" "OREGON" "PENNSYLVANIA" "RHODE ISLAND"
## [41] "SOUTH CAROLINA" "SOUTH DAKOTA" "TENNESSEE" "TEXAS"
## [45] "UTAH" "VERMONT" "VIRGINIA" "WASHINGTON"
## [49] "WEST VIRGINIA" "WISCONSIN" "WYOMING"
### Check the length of unique states in Crime data
length(unique(Crimerate$jurisdiction))
## [1] 51
head(Crimerate)
## # A tibble: 6 × 17
## jurisdiction includes_jails year prisoner_count crime_reporting_change
## <chr> <lgl> <dbl> <dbl> <lgl>
## 1 FEDERAL FALSE 2001 149852 NA
## 2 ALABAMA FALSE 2001 24741 FALSE
## 3 ALASKA TRUE 2001 4570 FALSE
## 4 ARIZONA FALSE 2001 27710 FALSE
## 5 ARKANSAS FALSE 2001 11489 FALSE
## 6 CALIFORNIA FALSE 2001 157142 FALSE
## # ℹ 12 more variables: crimes_estimated <lgl>, state_population <dbl>,
## # violent_crime_total <dbl>, murder_manslaughter <dbl>, rape_legacy <dbl>,
## # rape_revised <dbl>, robbery <dbl>, agg_assault <dbl>,
## # property_crime_total <dbl>, burglary <dbl>, larceny <dbl>,
## # vehicle_theft <dbl>
# Clean Crime data
###Change the column name
Crimerate <- Crimerate %>%
rename("STUSPS"="jurisdiction") %>%
rename("Year"="year") %>%
filter(STUSPS!="FEDERAL"& STUSPS!="ALASKA"& STUSPS!="HAWAII") %>%
filter(Year %in% c(2007:2014))
##Recheck the data
length(unique(Crimerate$STUSPS))
## [1] 48
head(Crimerate)
## # A tibble: 6 × 17
## STUSPS includes_jails Year prisoner_count crime_reporting_change
## <chr> <lgl> <dbl> <dbl> <lgl>
## 1 ALABAMA FALSE 2007 25253 FALSE
## 2 ARIZONA FALSE 2007 37700 FALSE
## 3 ARKANSAS FALSE 2007 13275 FALSE
## 4 CALIFORNIA FALSE 2007 171444 FALSE
## 5 COLORADO FALSE 2007 22666 FALSE
## 6 CONNECTICUT TRUE 2007 19438 FALSE
## # ℹ 12 more variables: crimes_estimated <lgl>, state_population <dbl>,
## # violent_crime_total <dbl>, murder_manslaughter <dbl>, rape_legacy <dbl>,
## # rape_revised <dbl>, robbery <dbl>, agg_assault <dbl>,
## # property_crime_total <dbl>, burglary <dbl>, larceny <dbl>,
## # vehicle_theft <dbl>
###Changes the state names in the state column "STUSPS"
Crimerate$STUSPS <- state.abb[match(str_to_title(Crimerate$STUSPS),state.name)]
###Calculate the crimerate
Crimerate <- Crimerate %>%
mutate(Crimerate=(violent_crime_total/state_population)*100000) %>%
dplyr::mutate_if(is.numeric, round, 1)
library(plotly)
# Select the states
selected_states <- c("IL", "CA", "ID", "IA")
#head(Unemployrate)
# Time Series plots for Unemployment Rate
# Filter the data for the selected states
Unemployrate_selected <- Unemployrate %>% filter(State %in% selected_states)
head(Unemployrate_selected)
## # A tibble: 6 × 6
## # Groups: State [1]
## State Year Totalforce Totalemployed Totalunemployed Meanrate
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 CA 2007 17893090 16931594 961496 6.78
## 2 CA 2008 18178125 16854483 1323642 8.64
## 3 CA 2009 18215141 16182572 2032569 12.4
## 4 CA 2010 18336283 16091950 2244333 13.8
## 5 CA 2011 18415107 16258137 2156970 13.4
## 6 CA 2012 18523796 16602671 1921125 12.1
# Create time series line plot using plotly
fig <- plot_ly()
#for(state in selected_states){
fig <- plot_ly( Unemployrate_selected, x = ~Year, y=~Meanrate, color=~State, type="scatter", mode="lines") %>%
layout(title = "Time Series Visualization for Selected States",
xaxis = list(title = "Year"),
yaxis = list(title = "Unemployment Rate"))
fig
# Time Series plots for Crime Rate
Crimerate_selected <- Crimerate %>% filter(STUSPS %in% selected_states)
head(Crimerate_selected)
## # A tibble: 6 × 18
## STUSPS includes_jails Year prisoner_count crime_reporting_change
## <chr> <lgl> <dbl> <dbl> <lgl>
## 1 CA FALSE 2007 171444 FALSE
## 2 ID FALSE 2007 7055 FALSE
## 3 IL FALSE 2007 45215 FALSE
## 4 IA FALSE 2007 8692 FALSE
## 5 CA FALSE 2008 171085 FALSE
## 6 ID FALSE 2008 7033 FALSE
## # ℹ 13 more variables: crimes_estimated <lgl>, state_population <dbl>,
## # violent_crime_total <dbl>, murder_manslaughter <dbl>, rape_legacy <dbl>,
## # rape_revised <dbl>, robbery <dbl>, agg_assault <dbl>,
## # property_crime_total <dbl>, burglary <dbl>, larceny <dbl>,
## # vehicle_theft <dbl>, Crimerate <dbl>
fig2 <- plot_ly()
#for(state in selected_states){
fig2 <- plot_ly( Crimerate_selected, x = ~Year, y=~Crimerate, color=~STUSPS , type="scatter", mode="lines") %>%
layout(title = "Time Series Visualization for Selected States",
xaxis = list(title = "Year"),
yaxis = list(title = "Crime Rate"))
fig2
# Scatter plot for the year 2014
Unemployrate_2014 <- Unemployrate %>% filter(Year == 2014)
Crimerate_2014 <- Crimerate %>% filter(Year == 2014)
# Join datasets for the year 2014
Crimerate <- Crimerate %>% rename(State = STUSPS)
colnames(Crimerate_2014)
## [1] "STUSPS" "includes_jails" "Year"
## [4] "prisoner_count" "crime_reporting_change" "crimes_estimated"
## [7] "state_population" "violent_crime_total" "murder_manslaughter"
## [10] "rape_legacy" "rape_revised" "robbery"
## [13] "agg_assault" "property_crime_total" "burglary"
## [16] "larceny" "vehicle_theft" "Crimerate"
Crimerate_2014 <- Crimerate_2014 %>% rename(State = STUSPS)
colnames(Unemployrate_2014)
## [1] "State" "Year" "Totalforce" "Totalemployed"
## [5] "Totalunemployed" "Meanrate"
colnames(Crimerate_2014)
## [1] "State" "includes_jails" "Year"
## [4] "prisoner_count" "crime_reporting_change" "crimes_estimated"
## [7] "state_population" "violent_crime_total" "murder_manslaughter"
## [10] "rape_legacy" "rape_revised" "robbery"
## [13] "agg_assault" "property_crime_total" "burglary"
## [16] "larceny" "vehicle_theft" "Crimerate"
data_2014 <- left_join(Unemployrate_2014, Crimerate_2014, by = c("State", "Year"))
# Scatter plot for the relationship between Unemployment Rate and Crime Rate in 2014
fig <- plot_ly(data_2014, x = ~Crimerate, y = ~Meanrate, type = "scatter", mode = "markers", color = ~State) %>%
layout(title = "Relationship between Unemployment Rate and Crime Rate in 2014",
xaxis = list(title = "Crime Rate"),
yaxis = list(title = "Unemployment Rate"))
fig
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
# Spatial plot for Unemployment Rate in 2014
# Join the data
data_2014 <- left_join(Contiguous_state, Unemployrate_2014, by = c("STUSPS" = "State"))
# Create the plot
p <- ggplot() +
geom_sf(data = data_2014, aes(fill = Meanrate)) +
labs(title = "Unemployment Rate in 2014",
fill = "Unemployment Rate",
x = "Longitude",
y = "Latitude") +
theme_minimal() +
coord_sf(datum = NA) +
north(data_2014, scale = 0.1, symbol = 3) +
scalebar(data_2014, dist = 100, dist_unit = "km", transform = TRUE)
# Print the plot
print(p)
# Spatial plot for Crime Rate in 2014
p2 <- ggplot() +
geom_sf(data = data_2014, aes(fill = Crimerate)) +
labs(title = "Crime Rate in 2014", fill = "Crime Rate") +
theme_minimal()
data_2014 <- left_join(Contiguous_state, Crimerate_2014, by = c("STUSPS" = "State"))
p2 <- ggplot() +
geom_sf(data = data_2014, aes(fill = Crimerate)) +
labs(title = "Crime Rate in 2014", fill = "Crime Rate",
x = "Longitude",
y = "Latitude") +
theme_minimal()
p2 <- p2 + north(data_2014) + scalebar(data_2014, dist = 100, dist_unit = "km", transform = TRUE)
# Print the plots
print(p2)
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.